#!/usr/bin/env python3
import argparse, os, sys, json, csv, subprocess, hashlib, platform, glob, re
from pathlib import Path
from datetime import date

CSV_HEADER = [
    "sim_id","repo_url","commit_hash","seed","config_ref","config_hash",
    "run_env","metrics","result_summary","date_ran","status","outputs_hashes"
]

def sha256_bytes(b: bytes) -> str:
    h = hashlib.sha256(); h.update(b); return h.hexdigest()

def sha256_file(p: Path) -> str:
    h = hashlib.sha256()
    with open(p, "rb") as f:
        for chunk in iter(lambda: f.read(1024 * 1024), b""):
            h.update(chunk)
    return h.hexdigest()

def compute_commit_hash(repo_path: Path) -> str:
    try:
        out = subprocess.check_output(
            ["git", "-C", str(repo_path), "rev-parse", "HEAD"],
            stderr=subprocess.STDOUT, text=True
        ).strip()
        if re.fullmatch(r"[0-9a-f]{40}", out):
            return out
        return ""
    except Exception:
        return ""

def gather_run_env(container_digest: str = "") -> str:
    os_part = f"{platform.system()} {platform.release()}"
    py_part = f"Python {platform.python_version()}"
    libs = []
    for mod in ["numpy", "scipy", "pandas", "torch", "jax", "matplotlib"]:
        try:
            m = __import__(mod); ver = getattr(m, "__version__", "unknown")
            libs.append(f"{mod} {ver}")
        except Exception:
            pass
    libs_part = "; ".join(libs) if libs else "no-key-libs-detected"
    cont_part = f"container sha256:{container_digest}" if container_digest else "container sha256:NA"
    return f"{os_part}; {py_part}; {libs_part}; {cont_part}"

def parse_kv_metrics(metrics_arg: str) -> dict:
    metrics_arg = metrics_arg.strip()
    if not metrics_arg:
        return {}
    if metrics_arg.startswith("{"):
        try:
            obj = json.loads(metrics_arg)
            return obj if isinstance(obj, dict) else {}
        except Exception:
            return {}
    out = {}
    parts = [p.strip() for p in metrics_arg.split(",") if p.strip()]
    for p in parts:
        if "=" in p:
            k, v = p.split("=", 1)
            k = k.strip(); v = v.strip()
            if v.lower() in ("true","false"):
                out[k] = v.lower() == "true"
            else:
                try:
                    out[k] = float(v) if "." in v else int(v)
                except Exception:
                    out[k] = v
    return out

def compute_config_hash(config_files, cli_string) -> (str, str):
    """
    Returns (config_hash, config_ref)
    - If one or more config files: hash a manifest of 'path:sha256(file)' lines (sorted by path).
    - Else if only CLI string: sha256(utf8(cli_string)).
    - Else: 'NA'.
    """
    cf = [Path(x) for x in config_files if x]
    cf = [p for p in cf if p.exists() and p.is_file()]
    if cf:
        lines = []
        for p in sorted(cf):
            lines.append(f"{p.as_posix()}:{sha256_file(p)}")
        manifest = "\n".join(lines).encode("utf-8")
        return sha256_bytes(manifest), " ".join([p.as_posix() for p in cf])
    if cli_string:
        return sha256_bytes(cli_string.encode("utf-8")), cli_string
    return "NA", "NA"

def compute_outputs_hashes(globs, explicit_paths):
    files = set()
    for g in globs:
        for p in glob.glob(g, recursive=True):
            if os.path.isfile(p):
                files.add(os.path.normpath(p))
    for p in explicit_paths:
        if os.path.isfile(p):
            files.add(os.path.normpath(p))
    items = sorted(files)
    pairs = []
    for p in items:
        try:
            h = sha256_file(Path(p))
            pairs.append(f"{p}:{h}")
        except Exception:
            continue
    return "; ".join(pairs)

def acceptance(rule: str, metrics: dict):
    rule = (rule or "").lower()
    if rule in ("none", "", "unknown"):
        return False, "no acceptance rule"

    if rule in ("casimir", "v1-casimir", "sim:v1-casimir-001"):
        val = metrics.get("I2_drift")
        if val is None:
            return False, "missing I2_drift metric"
        passed = float(val) <= 1e-3
        return passed, f"I2_drift={val} → {'PASS' if passed else 'FAIL'} (≤ 1e-3)"

    if rule in ("pivotfit", "v1-pivotfit", "sim:v1-pivotfit-002"):
        R2 = metrics.get("R2"); RMSE = metrics.get("RMSE")
        pivot = metrics.get("pivot"); monotonic = metrics.get("monotonic")
        missing = [k for k in ["R2","RMSE","pivot","monotonic"] if metrics.get(k) is None]
        if missing:
            return False, f"missing metrics: {', '.join(missing)}"
        ok = (float(R2) >= 0.90) and (float(RMSE) <= 0.15) and (1.8 <= float(pivot) <= 2.2) and (bool(monotonic) is True)
        return ok, f"R2={R2}, RMSE={RMSE}, pivot={pivot}, monotonic={monotonic} → {'PASS' if ok else 'FAIL'}"

    if rule in ("kappa",):
        # Deterministic: kappa must equal tau_nerve / t_collapse (tiny float tolerance)
        try:
            tn = float(metrics.get("tau_nerve"))
            tc = float(metrics.get("t_collapse"))
            kv = float(metrics.get("kappa"))
        except Exception:
            missing = [k for k in ["tau_nerve","t_collapse","kappa"] if metrics.get(k) is None]
            return False, "missing metrics: " + ", ".join(missing) if missing else "invalid numeric metrics"
        ok = abs(kv - (tn / tc)) <= 1e-6
        return ok, f"tau_nerve={tn}, t_collapse={tc}, kappa={kv} → {'PASS' if ok else 'FAIL'} (must equal tau_nerve/t_collapse)"

    if rule in ("comm",):
        return False, f"rule '{rule}' not yet defined → HOLDING"

    return False, f"unknown rule '{rule}'"

def main():
    ap = argparse.ArgumentParser(description="Collect AR sim provenance and append to CSV.")
    ap.add_argument("--sim-id", required=True)
    ap.add_argument("--repo-url", required=True)
    ap.add_argument("--repo-path", default="")
    ap.add_argument("--commit-hash", default="")
    ap.add_argument("--seed", default="NA")
    ap.add_argument("--config-file", action="append", default=[], help="Path to config file (repeatable)")
    ap.add_argument("--cli-string", default="", help="Exact CLI used to launch the run")
    ap.add_argument("--container-digest", default="", help="sha256:... if applicable")
    ap.add_argument("--metrics", default="", help="JSON or key=value[,key=value]")
    ap.add_argument("--rule", default="none", help="casimir|pivotfit|kappa|comm|none")
    ap.add_argument("--date-ran", default=str(date.today()), help="YYYY-MM-DD")
    ap.add_argument("--registry-csv", default="AR_sim_registry.csv")
    ap.add_argument("--outputs-glob", action="append", default=[], help="Glob(s) for result files")
    ap.add_argument("--output-file", action="append", default=[], help="Explicit result file path(s)")
    ap.add_argument("--compute-output-hashes", action="store_true", help="Compute SHA256 for outputs")
    args = ap.parse_args()

    ch = args.commit_hash.strip()
    if not ch and args.repo_path:
        ch = compute_commit_hash(Path(args.repo_path))

    conf_hash, conf_ref = compute_config_hash(args.config_file, args.cli_string)
    run_env = gather_run_env(args.container_digest)
    metrics = parse_kv_metrics(args.metrics)
    ok, verdict = acceptance(args.rule, metrics)
    outputs_hashes = ""
    if args.compute_output_hashes:
        outputs_hashes = compute_outputs_hashes(args.outputs_glob, args.output_file)

    metrics_str = ", ".join([f"{k}={v}" for k,v in metrics.items()]) if metrics else "NA"
    if ok:
        result_summary = f"{metrics_str} — {verdict}"
    else:
        reason = verdict or "FAILED"
        result_summary = f"{metrics_str} — {reason}"

    status = "VERIFIED" if (re.fullmatch(r"[0-9a-f]{40}", ch or "") and ok) else "HOLDING"

    csv_path = Path(args.registry_csv)
    write_header = not csv_path.exists()
    with open(csv_path, "a", newline="", encoding="utf-8") as f:
        w = csv.writer(f)
        if write_header:
            w.writerow(CSV_HEADER)
        w.writerow([
            args.sim_id, args.repo_url, ch if ch else "", args.seed,
            conf_ref, conf_hash, run_env, metrics_str, result_summary,
            args.date_ran, status, outputs_hashes
        ])

    print(f"Row appended to {csv_path.as_posix()}")
    print(f"Status: {status}")
    print(f"Summary: {result_summary}")
    if status != "VERIFIED":
        print("Note: status is HOLDING until required fields and acceptance rule pass.")

if __name__ == "__main__":
    main()
